#****************************************************
#RUN THIS AFTER RUNNING THE CODE CALLED "CLEANING"
#****************************************************




# REPLICATION CODE 

# ***************************************************************************
# PREPARATION
# ***************************************************************************

# First load the survey package. Activate the line below if you need to install
# the survey package as well. 

library(survey)
library(tidyverse)
library(StatMatch)
library(eeptools)
library(mvtnorm)
library(cowplot)



# ***************************************************************************
# METHODS PART A. REPLICATION
# ***************************************************************************

#### PART 1. Replication regressions 

#The code below generates descriptives using the appropriate
#survey weights. Note that a different weight is
#used for the general health outcomes than the
#mental health outcomes since those were only
#done among a subset of respondents, hence the two survey designs we create
#below. 

summary(sampledata_orig)
sampledata_orig$INTERVWMO<-as.factor(sampledata_orig$INTERVWMO)
sampledata_orig$REGION<-as.factor(sampledata_orig$REGION)
sampledata_orig$age_pol<-as.factor(sampledata_orig$age_pol)
sampledata_orig$SEX<-as.factor(sampledata_orig$SEX)
sampledata_orig$YEAR<-as.factor(sampledata_orig$YEAR)
sampledata_orig$temp<-as.factor(sampledata_orig$temp)
sampledata_orig$mood_dist<-as.factor(sampledata_orig$mood_dist)
sampledata_orig$poor_health<-as.factor(sampledata_orig$poor_health)
summary(sampledata_orig)



survDesign <- svydesign(id      = ~0,
                        strata  = NULL,
                        weights = ~PERWEIGHT,
                        nest    = FALSE,
                        data    = sampledata_orig)

survDesign2 <- svydesign(id      = ~0,
                         strata  = NULL,
                         weights = ~SAMPWEIGHT,
                         nest    = FALSE,
                         data    = sampledata_orig)



#these are the regression models. Note again that the MH outcomes
#use a different weight per NHIS guidelines which is why the survdesign2 and survdesign3
#are created

# Model R1
(healthOLS <- svyglm(health ~ age_pol+ temp+SEX+REGION+YEAR*INTERVWMO+elig1*post, design=survDesign))
summary(healthOLS, df.resid = degf(survDesign))
confint(healthOLS)
nobs(healthOLS)


# Model R3
survDesign3<-subset(survDesign2,!is.na(mood_dist))

(k6POIS <- svyglm(k6_index ~ age_pol+ temp+SEX+REGION+YEAR*INTERVWMO+elig1*post, design=survDesign3, family=poisson))
summary(k6POIS, df.resid = degf(survDesign3))
confint(k6POIS)
nobs(k6POIS)

#NOTE binary outcomes below are not included in our paper text to streamline discussion, but were in authors' original study
# Model R2
(poorhealthLOGIS <- svyglm(poor_health ~ age_pol+ temp+SEX+REGION+YEAR*INTERVWMO+elig1*post, design=survDesign, family=binomial (link="logit")))
summary(poorhealthLOGIS, df.resid = degf(survDesign))
confint(poorhealthLOGIS)
nobs(poorhealthLOGIS)

# Model R4
(moodLOGIS <- svyglm(mood_dist ~ age_pol+ temp+SEX+REGION+YEAR*INTERVWMO+elig1*post, design=survDesign3, family=binomial (link="logit")))
summary(moodLOGIS, df.resid = degf(survDesign3))
confint(moodLOGIS)
nobs(moodLOGIS)



# ***************************************************************************
# METHODS PART B. EXTENSIONS
# ***************************************************************************

#### PREP

# THESE FEW LINES CORRECT ISSUES WE FOUND IN THEIR ANALYSES.
# These are issues they have not yet detected and changed in their code.
# As such, we have left them unchanged in the replication above, but ammend 
# them here for our extensions.

#Fixing to remove people w/ missing values on education
sampledata <- sampledata_orig[ which(sampledata_orig$AGE>18&sampledata_orig$HISPETH==1 & sampledata_orig$non_citizen==1 
                             & (sampledata_orig$EDUCREC>=13  )
                             & (sampledata_orig$YRSINUS>2 ) ), ] 

#FIXING HOW THE K6 IS CALCULATED
sampledata$k6_index<-rowSums(sampledata[K6],na.rm=FALSE)
mean(sampledata$k6_index,na.rm=TRUE)

sampledata$mood_dist<-ifelse(sampledata$k6_index>=0 & sampledata$k6_index<=4, 0, ifelse(sampledata$k6_index>=5,1,NA))


#making even  more fixes from original paper
sampledata2<-sampledata

sampledata2$age_pol<-as.numeric(as.character((sampledata2$age_pol)))

#this fixes the ~100 people who had missing "years in US" but the authors code treated it as 9 years by accident
sampledata2$temp<-as.numeric(as.character((sampledata2$temp)))

sampledata2$temp<-ifelse(sampledata2$temp==9, NA, sampledata2$temp)


#this is then fixing the immigration < 16 flag to (1) use the right years-in-US flag above AND to use the rule
# of <16 rather than <= 16

sampledata2$imm16_fix<-NA
sampledata2$imm16_fix<-ifelse(sampledata2$foreign_born==0, 1, sampledata2$imm16_fix)
sampledata2$imm16_fix<-ifelse(sampledata2$AGE-sampledata2$temp<16, 1, sampledata2$imm16_fix)
sampledata2$imm16_fix<-ifelse(sampledata2$AGE-sampledata2$temp>=16, 0, sampledata2$imm16_fix)

sampledata2$temp<-as.factor(sampledata2$temp)


#this is using birth month / year rather than reported age to get a better / more accurate cutoff
#this then uses all the new info/fixes to recreate the elig1 flag, which is th emain eligibility indicator for the total control group
#note that I overwrote the old elig1 rather than creating a new var. Thats why i made a copy of the data (sampledata2) 
#rather than doing this on sampledata
sampledata2$birthy_cut<-ifelse(sampledata2$birthyr >= 1982 | (sampledata2$birthyr == 1981 & sampledata2$birthmo > 6), 1, 0)
sampledata2$elig1<-ifelse(sampledata2$imm16_fix==1 & sampledata2$non_citizen==1 & sampledata2$birthy_cut==1, 1, 0)
sampledata2$elig1<-ifelse(is.na(sampledata2$imm16_fix)==1 | is.na(sampledata2$birthy_cut)==1, NA, sampledata2$elig1)


#Create control group b), in code referred to as new_elig2 
#i.e. met age at policy introduction, not age at immigration
sampledata2$new_elig2<-ifelse(sampledata2$imm16_fix==1 & sampledata2$non_citizen==1 & 
                                (sampledata2$birthyr >= 1982 | (sampledata2$birthyr == 1981 & sampledata2$birthmo > 6)) , 1, 
                              ifelse( sampledata2$non_citizen==1  &
                                        (sampledata2$birthyr >= 1982 | (sampledata2$birthyr == 1981 & sampledata2$birthmo > 6)),0,NA))
sampledata2$new_elig2<-ifelse(is.na(sampledata2$imm16_fix)==1 | is.na(sampledata2$birthy_cut)==1, NA, sampledata2$new_elig2)


#Create control group a), in code referred to as new_elig3 
#i.e. met age at immigration, not age at policy introduction
sampledata2$new_elig3<-ifelse(sampledata2$imm16_fix==1 & sampledata2$non_citizen==1 & 
                                (sampledata2$birthyr >= 1982 | (sampledata2$birthyr == 1981 & sampledata2$birthmo > 6)) ,1, 
                              ifelse( sampledata2$non_citizen==1  & sampledata2$imm16_fix==1 ,0,NA))
sampledata2$new_elig3<-ifelse(is.na(sampledata2$imm16_fix)==1 | is.na(sampledata2$birthy_cut)==1, NA, sampledata2$new_elig3)



#Create control group c), in code referred to as new_elig4
#i.e. met neither criterion
sampledata2$new_elig4<-ifelse(sampledata2$imm16_fix==1 & sampledata2$non_citizen==1 & 
                                (sampledata2$birthyr >= 1982 | (sampledata2$birthyr == 1981 & sampledata2$birthmo > 6)) ,1,
                              ifelse( sampledata2$non_citizen==1  & sampledata2$imm16_fix==0 & 
                                        (sampledata2$birthyr < 1981 | (sampledata2$birthyr == 1981 & sampledata2$birthmo <= 6)),0,NA))
sampledata2$new_elig4<-ifelse(is.na(sampledata2$imm16_fix)==1 | is.na(sampledata2$birthy_cut)==1, NA, sampledata2$new_elig4)



survDesign <- svydesign(id      = ~0,
                        strata  = NULL,
                        weights = ~PERWEIGHT,
                        nest    = FALSE,
                        data    = sampledata2)

survDesign2 <- svydesign(id      = ~0,
                         strata  = NULL,
                         weights = ~SAMPWEIGHT,
                         nest    = FALSE,
                         data    = sampledata2)

#gives N for each group for table 3
sampledata2 %>%  
  filter(is.na(health)==FALSE&is.na(AGE)==FALSE&
           is.na(age_imm)==FALSE&is.na(SEX)==FALSE
         &is.na(INTERVWMO)==FALSE&is.na(REGION)==FALSE
         &is.na(age_pol)==FALSE&is.na(temp)==FALSE
         &is.na(YEAR)==FALSE&is.na(poor_health)==FALSE) %>%
  group_by(elig1, post) %>% 
  summarise(n=n()) %>%
  mutate(freq=n/sum(n))

sampledata2 %>%  
  filter(is.na(k6_index)==FALSE&is.na(AGE)==FALSE&
           is.na(age_imm)==FALSE&is.na(SEX)==FALSE
         &is.na(INTERVWMO)==FALSE&is.na(REGION)==FALSE
         &is.na(age_pol)==FALSE&is.na(temp)==FALSE
         &is.na(YEAR)==FALSE&is.na(mood_dist)==FALSE) %>%
  group_by(elig1, post) %>% 
  summarise(n=n()) %>%
  mutate(freq=n/sum(n))

# Table 3 Column 1
# Now that the survey designs are set, calculate the descriptives of interest 
# for those eligible for DACA, pre-DACA implementation 

eligpreDesign<-subset(survDesign,elig1==1&post==0)

svymean(~AGE, eligpreDesign, na.rm=TRUE)
svymean(~age_imm, eligpreDesign, na.rm=TRUE)
svymean(~poor_health, eligpreDesign, na.rm=TRUE)
svymean(~health, eligpreDesign, na.rm=TRUE)
svymean(~SEX, eligpreDesign, na.rm=TRUE)
svymean(~REGION, eligpreDesign, na.rm=TRUE)

# (We have to use the second survey design for mental health outcomes)
eligpreDesign2<-subset(survDesign2,elig1==1&post==0&is.na(k6_index)==F)
svymean(~k6_index, eligpreDesign2, na.rm=TRUE)
svymean(~mood_dist, eligpreDesign2, na.rm=TRUE)


# Table 3 Column 2
# calculate the descriptives of interest 
# for those eligible for DACA, post-DACA implementation 

eligpostDesign<-subset(survDesign,elig1==1&post==1)

svymean(~AGE, eligpostDesign, na.rm=TRUE)
svymean(~age_imm, eligpostDesign, na.rm=TRUE)
svymean(~poor_health, eligpostDesign, na.rm=TRUE)
svymean(~health, eligpostDesign, na.rm=TRUE)
svymean(~SEX, eligpostDesign, na.rm=TRUE)
svymean(~REGION, eligpostDesign, na.rm=TRUE)


eligpostDesign2<-subset(survDesign2,elig1==1&post==1&is.na(k6_index)==F)
svymean(~k6_index, eligpostDesign2, na.rm=TRUE)
svymean(~mood_dist, eligpostDesign2, na.rm=TRUE)


# Table 3 Column 3
# calculate the descriptives of interest 
# for those not eligible for DACA, pre-DACA implementation

noneligpreDesign<-subset(survDesign,elig1==0&post==0)

svymean(~AGE, noneligpreDesign, na.rm=TRUE)
svymean(~age_imm, noneligpreDesign, na.rm=TRUE)
svymean(~poor_health, noneligpreDesign, na.rm=TRUE)
svymean(~health, noneligpreDesign, na.rm=TRUE)
svymean(~SEX, noneligpreDesign, na.rm=TRUE)
svymean(~REGION, noneligpreDesign, na.rm=TRUE)


noneligpreDesign2<-subset(survDesign2,elig1==0&post==0&is.na(k6_index)==F)
svymean(~k6_index, noneligpreDesign2, na.rm=TRUE)
svymean(~mood_dist, noneligpreDesign2, na.rm=TRUE)


# Table 3 Column 4
# calculate the descriptives of interest 
# for those not eligible for DACA, post-DACA implementation

noneligpostDesign<-subset(survDesign,elig1==0&post==1)

svymean(~AGE, noneligpostDesign, na.rm=TRUE)
svymean(~age_imm, noneligpostDesign, na.rm=TRUE)
svymean(~poor_health, noneligpostDesign, na.rm=TRUE)
svymean(~health, noneligpostDesign, na.rm=TRUE)
svymean(~SEX, noneligpostDesign, na.rm=TRUE)
svymean(~REGION, noneligpostDesign, na.rm=TRUE)


noneligpostDesign2<-subset(survDesign2,elig1==0&post==1&is.na(k6_index)==F)
svymean(~k6_index, noneligpostDesign2, na.rm=TRUE)
svymean(~mood_dist, noneligpostDesign2, na.rm=TRUE)



###############################################################################

#### i. Repeating regressions with alternative control groups

###NOTE: the simulation code to generate risk differences (figures 2 and 3)
###is provided separately because it is very long. This code gives IRRs for the POisson
###model, which is the original author's QOI

#PREPPING VARS FOR REGRESSIONS
sampledata2$INTERVWMO<-as.factor(sampledata2$INTERVWMO)
sampledata2$REGION<-as.factor(sampledata2$REGION)
sampledata2$SEX<-as.factor(sampledata2$SEX)
sampledata2$YEAR<-as.factor(sampledata2$YEAR)
sampledata2$temp<-as.factor(sampledata2$temp)
sampledata2$mood_dist<-as.factor(sampledata2$mood_dist)
sampledata2$poor_health<-as.factor(sampledata2$poor_health)



#CREATING THE VARIOUS CONTROL GROUPS TO EXPLORE HETEROGENEITY IN THE CONTROL 
#GROUP SINCE THERE ARE 2 CRITERIA TO BE INCLUDED
#SOME PEOPLE ONLY MEET THE AGE AT IMMIGRAITON CRITERIA, 
#OTHERS ONLY MEET THE AGE AT POLICY CRITERIA, SOME MEET NEITHER
#SO WE RAN THE ORIGINAL REGRESSIONS LOOKING AT EACH OF THESE


# ADOPT survDesign2 FOR MENTAL HEALTH OUTCOMES 
survDesign2 <- svydesign(id      = ~0,
                         strata  = NULL,
                         weights = ~SAMPWEIGHT,
                         nest    = FALSE,
                         data    = sampledata2)


#RUN REGRESSIONS FOR K6 OUTCOME 

#Repeat R1
survDesign3.1<-subset(survDesign2,!is.na(mood_dist))

(k6POIS <- svyglm(k6_index ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+elig1*post, design=survDesign3.1, family=poisson))
summary(k6POIS, df.resid = degf(survDesign3.1))
confint(k6POIS)
nobs(k6POIS)

#Model E3b
survDesign3.2<-subset(survDesign2,!is.na(mood_dist) & !is.na(new_elig2))

(k6POIS <- svyglm(k6_index ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+new_elig2*post, design=survDesign3.2, family=poisson))
summary(k6POIS, df.resid = degf(survDesign3.2))
confint(k6POIS)
nobs(k6POIS)

#Model E3a
survDesign3.3<-subset(survDesign2,!is.na(mood_dist) & !is.na(new_elig3))

(k6POIS <- svyglm(k6_index ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+new_elig3*post, design=survDesign3.3, family=poisson))
summary(k6POIS, df.resid = degf(survDesign3.3))
confint(k6POIS)
nobs(k6POIS)

#Model E3c
survDesign3.4<-subset(survDesign2,!is.na(mood_dist) & !is.na(new_elig4))

(k6POIS <- svyglm(k6_index ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+new_elig4*post, design=survDesign3.4, family=poisson))
summary(k6POIS, df.resid = degf(survDesign3.4))
confint(k6POIS)
nobs(k6POIS)


#NOTE: results for this outcome not in our paper
#RUN REGRESSIONS FOR BINARY MOD PSYCH DISTRESS OUTCOME

#Repeat R4
(moodLOGIS <- svyglm(mood_dist ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+elig1*post, design=survDesign3.1, family=binomial (link="logit")))
summary(moodLOGIS, df.resid = degf(survDesign3.1))
confint(moodLOGIS)
nobs(moodLOGIS)

#Model E4b
(moodLOGIS <- svyglm(mood_dist ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+new_elig2*post, design=survDesign3.2, family=binomial (link="logit")))
summary(moodLOGIS, df.resid = degf(survDesign3.2))
confint(moodLOGIS)
nobs(moodLOGIS)

#Model E4a
(moodLOGIS <- svyglm(mood_dist ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+new_elig3*post, design=survDesign3.3, family=binomial (link="logit")))
summary(moodLOGIS, df.resid = degf(survDesign3.3))
confint(moodLOGIS)
nobs(moodLOGIS)

#Model E4c
(moodLOGIS <- svyglm(mood_dist ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+new_elig4*post, design=survDesign3.4, family=binomial (link="logit")))
summary(moodLOGIS, df.resid = degf(survDesign3.4))
confint(moodLOGIS)
nobs(moodLOGIS)



# ADOPT survDesign for physical/overall health outcomes
survDesign <- svydesign(id      = ~0,
                        strata  = NULL,
                        weights = ~PERWEIGHT,
                        nest    = FALSE,
                        data    = sampledata2)


#RUN REGRESSIONS FOR OVERAL SELF-REPORTED HEALTH LIKERT SCALE

#Repeat R1
survDesign4.1<-subset(survDesign,!is.na(elig1))

(healthOLS <- svyglm(health ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+elig1*post, design=survDesign4.1))
summary(healthOLS, df.resid = degf(survDesign4.1))
confint(healthOLS)
nobs(healthOLS)

#Model E1b
survDesign4.2<-subset(survDesign,!is.na(new_elig2))

(healthOLS <- svyglm(health ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+new_elig2*post, design=survDesign4.2))
summary(healthOLS, df.resid = degf(survDesign4.2))
confint(healthOLS)
nobs(healthOLS)

#Model E1a
survDesign4.3<-subset(survDesign,!is.na(new_elig3))

(healthOLS <- svyglm(health ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+new_elig3*post, design=survDesign4.3))
summary(healthOLS, df.resid = degf(survDesign4.3))
confint(healthOLS)
nobs(healthOLS)

#Model E1c
survDesign4.4<-subset(survDesign,!is.na(new_elig4))

(healthOLS <- svyglm(health ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+new_elig4*post, design=survDesign4.4))
summary(healthOLS, df.resid = degf(survDesign4.4))
confint(healthOLS)
nobs(healthOLS)


#NOTE: results for outcome below not in our final paper version
#RUN REGRESSIONS FOR POOR/FAIR HEALTH BINARY VARIABLE

#Repeat R2
(poorhealthLOGIS <- svyglm(poor_health ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+elig1*post, design=survDesign4.1, family=binomial (link="logit")))
summary(poorhealthLOGIS, df.resid = degf(survDesign4.1))
confint(poorhealthLOGIS)
nobs(poorhealthLOGIS)

#Model E2b
(poorhealthLOGIS <- svyglm(poor_health ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+new_elig2*post, design=survDesign4.2, family=binomial (link="logit")))
summary(poorhealthLOGIS, df.resid = degf(survDesign4.2))
confint(poorhealthLOGIS)
nobs(poorhealthLOGIS)

#Model E2a
(poorhealthLOGIS <- svyglm(poor_health ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+new_elig3*post, design=survDesign4.3, family=binomial (link="logit")))
summary(poorhealthLOGIS, df.resid = degf(survDesign4.3))
confint(poorhealthLOGIS)
nobs(poorhealthLOGIS)

#Model E2c
(poorhealthLOGIS <- svyglm(poor_health ~ age_pol+ temp+SEX+REGION+YEAR+INTERVWMO+new_elig4*post, design=survDesign4.4, family=binomial (link="logit")))
summary(poorhealthLOGIS, df.resid = degf(survDesign4.4))
confint(poorhealthLOGIS)
nobs(poorhealthLOGIS)



###############################################################################

###############################################################################

#### ii. Inspecting parallel trends for DiD analysis


#######
# Now probe parallel trends assumption further. Below we regress i) health 
# status on time and the covariates in the model, with an interaction term for 
# time and eligibility group. The coefficient on this interaction term will 
# inform us as to whether there is a significant difference in health over time 
# between the treatment and control groups. 


##
# As we are only interested in the period before policy introduction here, 
# create new dataset with only the pre-policy data: 

sampledata2$prepolicy<- ifelse(sampledata2$yr_mo <= 2012.45, 1, 0)
pt_data <- sampledata2[ which(sampledata2$prepolicy == 1),] 

## HEALTH STATUS
survDesign_pt <- svydesign(id      = ~0,
                           strata  = NULL,
                           weights = ~PERWEIGHT,
                           nest    = FALSE,
                           data    = pt_data)

# Original control group
survDesign_pt_elig1<-subset(survDesign_pt,!is.na(elig1))

(pt_health_OLS <- svyglm(health ~ AGE+temp+SEX+REGION+yr_mo*elig1, design=survDesign_pt_elig1))
summary(pt_health_OLS, df.resid = degf(survDesign_pt_elig1))
confint(pt_health_OLS)
nobs(pt_health_OLS)

# Control group a) 
survDesign_pt_elig3<-subset(survDesign_pt,!is.na(new_elig3))

(pt_health_OLS_3 <- svyglm(health ~ AGE+temp+SEX+REGION+yr_mo*elig1, design=survDesign_pt_elig3))
summary(pt_health_OLS_3, df.resid = degf(survDesign_pt_elig3))
confint(pt_health_OLS_3)
nobs(pt_health_OLS_3)

# Control group b) 
survDesign_pt_elig2<-subset(survDesign_pt,!is.na(new_elig2))

(pt_health_OLS_b <- svyglm(health ~ AGE+temp+SEX+REGION+yr_mo*elig1, design=survDesign_pt_elig2))
summary(pt_health_OLS_b, df.resid = degf(survDesign_pt_elig2))
confint(pt_health_OLS_b)
nobs(pt_health_OLS_b)

# Control group c) 
survDesign_pt_elig4<-subset(survDesign_pt,!is.na(new_elig4))

(pt_health_OLS_c <- svyglm(health ~ AGE+temp+SEX+REGION+yr_mo*elig1, design=survDesign_pt_elig4))
summary(pt_health_OLS_c, df.resid = degf(survDesign_pt_elig4))
confint(pt_health_OLS_c)
nobs(pt_health_OLS_c)


########################################

## K6 outcome 
survDesign_pt2 <- svydesign(id      = ~0,
                            strata  = NULL,
                            weights = ~SAMPWEIGHT,
                            nest    = FALSE,
                            data    = pt_data)

#Original control group
survDesign_pt2_elig1<-subset(survDesign_pt2,!is.na(elig1))

(pt_k6_OLS <- svyglm(k6_index ~ AGE+temp+SEX+REGION+yr_mo*elig1, design=survDesign_pt2_elig1))
summary(pt_k6_OLS, df.resid = degf(survDesign_pt2_elig1))
confint(pt_k6_OLS)
nobs(pt_k6_OLS)

#Control group a)
survDesign_pt2_elig3<-subset(survDesign_pt2,!is.na(new_elig3))

(pt_k6_OLS_a <- svyglm(k6_index ~ AGE+temp+SEX+REGION+yr_mo*elig1, design=survDesign_pt2_elig3))
summary(pt_k6_OLS_a, df.resid = degf(survDesign_pt2_elig3))
confint(pt_k6_OLS_a)
nobs(pt_k6_OLS_a)

#Control group b)
survDesign_pt2_elig2<-subset(survDesign_pt2,!is.na(new_elig2))

(pt_k6_OLS_b <- svyglm(k6_index ~ AGE+temp+SEX+REGION+yr_mo*elig1, design=survDesign_pt2_elig2))
summary(pt_k6_OLS_b, df.resid = degf(survDesign_pt2_elig2))
confint(pt_k6_OLS_b)
nobs(pt_k6_OLS_b)

#Control group a)
survDesign_pt2_elig4<-subset(survDesign_pt2,!is.na(new_elig4))

(pt_k6_OLS_c <- svyglm(k6_index ~ AGE+temp+SEX+REGION+yr_mo*elig1, design=survDesign_pt2_elig4))
summary(pt_k6_OLS_c, df.resid = degf(survDesign_pt2_elig4))
confint(pt_k6_OLS_c)
nobs(pt_k6_OLS_c)


###############################################################################

#### iii. Changing ID strategy to RDD

#Looking at age eligibility cutoff

#this code examines around the age cutoff (31) for policy eligibility
#while the original authors' analysis has a good sized N for precision, the control group has
#lots of folks who are pretty irrelevant -- e.g., those who didn't qualify by either criteria
#So we also wanted to examine folks right near the cutoff (e.g., ages 28-35), all of whom met the immig age criteria



#uses group 3 aka group a) - met immig criteria but not age criteria, and then limits to folks close to cutoff



### MODEL E7 ###

#this one runs the same regression as main results, but only on ppl near cutoff. Does not find stronger effect and is actually
#slightly positive rather than negative
survDesign2 <- svydesign(id      = ~0,
                         strata  = NULL,
                         weights = ~SAMPWEIGHT,
                         nest    = FALSE,
                         data    = sampledata2)

survDesign3.3.rdd<-subset(survDesign2,!is.na(mood_dist) & !is.na(new_elig3)& age_pol %in% c(27,28,29,30,31,32,33,34))

#this simulates the actual difference rather than the IRR
(k6POIS <- svyglm(k6_index ~ age_pol+new_elig3*post+SEX+REGION+YEAR+INTERVWMO, design=survDesign3.3.rdd, family=poisson))
summary(k6POIS, df.resid = degf(survDesign3.3.rdd))
confint(k6POIS)
nobs(k6POIS)
set.seed(02138)
sims<- rmvnorm(5000, mean = coef(k6POIS),
               sigma = vcov(k6POIS))
vals_data00 <- data.frame(model.matrix(k6POIS))
colnames(vals_data00)
colnames(sims)
dim(sims)
dim(vals_data00)

vals_data00<-vals_data00 
vals_data00$post<-0
vals_data00$new_elig3<-0
vals_data00$new_elig3.post<-0
# vals_data$age_pol_cent<-i
vals_data00<-as.matrix(vals_data00)

X_beta <- t(vals_data00%*%t(sims))
expect_00 <-exp(X_beta)
expect_00_means<-rowMeans(expect_00)
mean(expect_00_means)


vals_data01 <- data.frame(model.matrix(k6POIS))
vals_data01<-vals_data01
vals_data01$post<-1
vals_data01$new_elig3<-0
vals_data01$new_elig3.post<-0
# vals_data$age_pol_cent<-i
vals_data01<-as.matrix(vals_data01)

X_beta <- t(vals_data01%*%t(sims))
expect_01 <-exp(X_beta)
expect_01_means<-rowMeans(expect_01)
mean(expect_01_means)

vals_data10 <- data.frame(model.matrix(k6POIS))
vals_data10<-vals_data10 
vals_data10$post<-0
vals_data10$new_elig3<-1
vals_data10$new_elig3.post<-0
# vals_data$age_pol_cent<-i
vals_data10<-as.matrix(vals_data10)

X_beta <- t(vals_data10%*%t(sims))
expect_10 <-exp(X_beta)
expect_10_means<-rowMeans(expect_10)
mean(expect_10_means)

vals_data11 <- data.frame(model.matrix(k6POIS))
vals_data11<-vals_data11
vals_data11$post<-1
vals_data11$new_elig3<-1
vals_data11$new_elig3.post<-1
# vals_data$age_pol_cent<-i
vals_data11<-as.matrix(vals_data11)

X_beta <- t(vals_data11%*%t(sims))
expect_11 <-exp(X_beta)
expect_11_means<-rowMeans(expect_11)
mean(expect_11_means)

diffs<-(expect_11_means-expect_10_means)-(expect_01_means-expect_00_means)
mean(diffs)
quantile(diffs,c(0.025))
quantile(diffs,c(0.975))


IRRs<-(expect_11_means/expect_10_means)/(expect_01_means/expect_00_means)
mean(IRRs)
quantile(IRRs,c(0.025))
quantile(IRRs,c(0.975))




### MODEL E8 ###
#expanding window slightly

survDesign2 <- svydesign(id      = ~0,
                         strata  = NULL,
                         weights = ~SAMPWEIGHT,
                         nest    = FALSE,
                         data    = sampledata2)

survDesign3.3.rdd<-subset(survDesign2,!is.na(mood_dist) & !is.na(new_elig3)& age_pol %in% c(25,26,27,28,29,30,31,32,33,34,35,36))


(k6POIS <- svyglm(k6_index ~ age_pol+new_elig3*post+SEX+REGION+YEAR+INTERVWMO, design=survDesign3.3.rdd, family=poisson))
summary(k6POIS, df.resid = degf(survDesign3.3.rdd))
confint(k6POIS)
nobs(k6POIS)

#simulates risk diff rather than IRR from regression
set.seed(02138)
sims<- rmvnorm(5000, mean = coef(k6POIS),
               sigma = vcov(k6POIS))
vals_data00 <- data.frame(model.matrix(k6POIS))
colnames(vals_data00)
colnames(sims)
dim(sims)
dim(vals_data00)

vals_data00<-vals_data00 
vals_data00$post<-0
vals_data00$new_elig3<-0
vals_data00$new_elig3.post<-0
# vals_data$age_pol_cent<-i
vals_data00<-as.matrix(vals_data00)

X_beta <- t(vals_data00%*%t(sims))
expect_00 <-exp(X_beta)
expect_00_means<-rowMeans(expect_00)
mean(expect_00_means)


vals_data01 <- data.frame(model.matrix(k6POIS))
vals_data01<-vals_data01
vals_data01$post<-1
vals_data01$new_elig3<-0
vals_data01$new_elig3.post<-0
# vals_data$age_pol_cent<-i
vals_data01<-as.matrix(vals_data01)

X_beta <- t(vals_data01%*%t(sims))
expect_01 <-exp(X_beta)
expect_01_means<-rowMeans(expect_01)
mean(expect_01_means)


vals_data10 <- data.frame(model.matrix(k6POIS))
vals_data10<-vals_data10 
vals_data10$post<-0
vals_data10$new_elig3<-1
vals_data10$new_elig3.post<-0
# vals_data$age_pol_cent<-i
vals_data10<-as.matrix(vals_data10)

X_beta <- t(vals_data10%*%t(sims))
expect_10 <-exp(X_beta)
expect_10_means<-rowMeans(expect_10)
mean(expect_10_means)

vals_data11 <- data.frame(model.matrix(k6POIS))
vals_data11<-vals_data11
vals_data11$post<-1
vals_data11$new_elig3<-1
vals_data11$new_elig3.post<-1
# vals_data$age_pol_cent<-i
vals_data11<-as.matrix(vals_data11)

X_beta <- t(vals_data11%*%t(sims))
expect_11 <-exp(X_beta)
expect_11_means<-rowMeans(expect_11)
mean(expect_11_means)


diffs<-(expect_11_means-expect_10_means)-(expect_01_means-expect_00_means)
mean(diffs)
quantile(diffs,c(0.025))
quantile(diffs,c(0.975))


IRRs<-(expect_11_means/expect_10_means)/(expect_01_means/expect_00_means)
mean(IRRs)
quantile(IRRs,c(0.025))
quantile(IRRs,c(0.975))



### MODEL E9 ###

#traditional RDD , in post period only

#data needs to be put into format that can be run in traditional RDD (ie center running var at 0)
sampledata_rd<-sampledata2

sampledata_rd$birthmo<-ifelse(is.na(sampledata_rd$birthmo)==1,6,sampledata_rd$birthmo)
sampledata_rd$bday<-paste(sampledata_rd$birthyr,sampledata_rd$birthmo,"01", sep="-")
sampledata_rd$bday<-as.Date(sampledata_rd$bday)
pol<-as.Date(c("2012-06-12"))

sampledata_rd<-subset(sampledata_rd, is.na(elig1)==0)
sampledata_rd$age_calc <- floor(age_calc(sampledata_rd$bday, enddate=pol, units = "years"))


sampledata_rd$age_calc_cent<-sampledata_rd$age_calc-30
sampledata_rd$over31<--1*(sampledata_rd$new_elig3-1)

# First for mental health K6 variable
survDesign_RD <- svydesign(id      = ~0,
                           strata  = NULL,
                           weights = ~SAMPWEIGHT,
                           nest    = FALSE,
                           data    = sampledata_rd)

survDesign3.3.rd<-subset(survDesign_RD,!is.na(mood_dist) & !is.na(new_elig3) & post==1 & age_calc>=25 & age_calc<=36 )

(k6POIS_cutoff <- svyglm(k6_index ~  age_calc_cent*over31, design=survDesign3.3.rd, family="poisson"))
summary(k6POIS_cutoff, df.resid = degf(survDesign3.3.rd))
exp(k6POIS_cutoff$coefficients)
exp(confint(k6POIS_cutoff))
nobs(k6POIS_cutoff)

#simulates effect rather than giving IRR

set.seed(02138)
sims<- rmvnorm(5000, mean = coef(k6POIS_cutoff),
               sigma = vcov(k6POIS_cutoff))
vals_data00 <- data.frame(model.matrix(k6POIS_cutoff))
colnames(vals_data00)
colnames(sims)
dim(sims)
dim(vals_data00)

vals_data00<-vals_data00 
vals_data00$over31<-0
vals_data00$age_calc_cent.over31<-0
vals_data00$age_calc_cent<-0
# vals_data$age_pol_cent<-i
vals_data00<-as.matrix(vals_data00)

X_beta <- t(vals_data00%*%t(sims))
expect_00 <-exp(X_beta)
expect_00_means<-rowMeans(expect_00)
mean(expect_00_means)


vals_data01 <- data.frame(model.matrix(k6POIS_cutoff))
vals_data01<-vals_data01
vals_data01$over31<-1
vals_data01$age_calc_cent.over31<-1
vals_data01$age_calc_cent<-1
# vals_data$age_pol_cent<-i
vals_data01<-as.matrix(vals_data01)

X_beta <- t(vals_data01%*%t(sims))
expect_01 <-exp(X_beta)
expect_01_means<-rowMeans(expect_01)
mean(expect_01_means)

diffs<-(expect_01_means-expect_00_means)
mean(diffs)
quantile(diffs,c(0.025))
quantile(diffs,c(0.975))




### MODEL E10 ###
#traditional RDD but in pre-period as falsification

survDesign3.3.rd<-subset(survDesign_RD,!is.na(mood_dist) & !is.na(new_elig3) & post==0 & age_calc>=25 & age_calc<=36)

(k6POIS_cutoff <- svyglm(k6_index ~  age_calc_cent*over31, design=survDesign3.3.rd, family="poisson"))
summary(k6POIS_cutoff, df.resid = degf(survDesign3.3.rd))
exp(k6POIS_cutoff$coefficients)
exp(confint(k6POIS_cutoff))
nobs(k6POIS_cutoff)


#simulating effect rarther than IRR from reg output
set.seed(02138)
sims<- rmvnorm(5000, mean = coef(k6POIS_cutoff),
               sigma = vcov(k6POIS_cutoff))
vals_data00 <- data.frame(model.matrix(k6POIS_cutoff))
colnames(vals_data00)
colnames(sims)
dim(sims)
dim(vals_data00)

vals_data00<-vals_data00 
vals_data00$over31<-0
vals_data00$age_calc_cent.over31<-0
vals_data00$age_calc_cent<-0
# vals_data$age_pol_cent<-i
vals_data00<-as.matrix(vals_data00)

X_beta <- t(vals_data00%*%t(sims))
expect_00 <-exp(X_beta)
expect_00_means<-rowMeans(expect_00)
mean(expect_00_means)


vals_data01 <- data.frame(model.matrix(k6POIS_cutoff))
vals_data01<-vals_data01
vals_data01$over31<-1
vals_data01$age_calc_cent.over31<-1
vals_data01$age_calc_cent<-1
# vals_data$age_pol_cent<-i
vals_data01<-as.matrix(vals_data01)

X_beta <- t(vals_data01%*%t(sims))
expect_01 <-exp(X_beta)
expect_01_means<-rowMeans(expect_01)
mean(expect_01_means)

diffs<-(expect_01_means-expect_00_means)
mean(diffs)
quantile(diffs,c(0.025))
quantile(diffs,c(0.975))









### "MODEL(s)" E12 ###
#assessing covariate balance for group around cutoff (fig 6)

library(tidyverse)
library(StatMatch)
covars<-c("YEAR","INTERVWMO","SEX","REGION","temp")

#we tried matching folks at the cutoff, but had trouble improving balance with the small sample
keep<-c("new_elig3","post","k6_index","mood_dist","AGE","age_pol","age_imm",covars)
formatch<-sampledata2[keep]

formatch$age_pol<-as.numeric(as.character(formatch$age_pol))


formatch<- formatch %>% 
  filter(is.na(k6_index)==FALSE&is.na(AGE)==FALSE&
           is.na(age_imm)==FALSE&is.na(SEX)==FALSE
         &is.na(INTERVWMO)==FALSE&is.na(REGION)==FALSE
         &is.na(age_pol)==FALSE&is.na(temp)==FALSE
         &is.na(YEAR)==FALSE&is.na(mood_dist)==FALSE & post==1 & is.na(new_elig3)==FALSE & age_pol>=25 & age_pol<=36) # & imm16==1 


reg<-data.frame(model.matrix(~formatch$REGION))
reg<-reg[-c(1)]

# year<-data.frame(model.matrix(~formatch$YEAR))
# year<-year[-c(1)]

int<-data.frame(model.matrix(~formatch$INTERVWMO))
int<-int[-c(1)]

# te<-data.frame(model.matrix(~formatch$temp))
# te<-te[-c(1)]
formatch$YEAR<-as.numeric(as.character(formatch$YEAR))

formatch$SEX<-as.numeric(as.character(formatch$SEX))

formatch<-data.frame(formatch,int,reg)
# drop<-c("YEAR","REGION","temp")
# 
# formatch<-formatch[,!(names(formatch) %in% drop)]

# formatch$new_elig3<-(as.character(formatch$new_elig3))

table(formatch$new_elig3)


cov_means <- formatch  %>%
  group_by(new_elig3) %>% 
  summarise(across
            (-one_of('post',"k6_index","mood_dist","AGE","INTERVWMO","REGION","temp","age_imm","age_pol"), mean)) %>%
  t() %>%
  as_tibble(rownames = 'var') %>%
  slice(-1) %>%
  rename(mean_control = 2,
         mean_treated = 3)
cov_means

cov_var <- formatch %>% 
  group_by(new_elig3) %>% 
  summarise(across (-one_of('post',"k6_index","mood_dist","AGE","INTERVWMO","REGION","temp","age_imm","age_pol"), mean))  %>%
  t() %>%
  as_tibble(rownames = 'var') %>%
  slice(-1) %>%
  rename(var_control = 2,
         var_treated = 3)

## Combine 
## Calculate standardized mean difference (SMD)

bal_pre <- cov_means %>% 
  left_join(cov_var) %>%
  mutate(mean_diff = mean_treated - mean_control,
         SMD = mean_diff / sqrt((var_control + var_treated)/2)) %>%
  mutate(type = 'Before matching')

## Plot normalized mean difference for all covariates 

ggplot(bal_pre, aes(x = var, y = SMD)) + 
  geom_bar(stat = 'identity',
           col = 'black') +
  coord_flip() + 
  labs(y = 'Normalized difference',
       x = 'Variable') + 
  theme_bw() + ylim(-1,1)


#############################
#CODE TO CREATE FIGURE 1
############################
plotd_post<-subset(sampledata2, post==1 & is.na(elig1)==0)

plotd_post$alle<-ifelse(plotd_post$elig1==1, 1, NA)
plotd_post$alle<-ifelse(plotd_post$new_elig2==0 & is.na(plotd_post$alle)==1, 2, plotd_post$alle)
plotd_post$alle<-ifelse(plotd_post$new_elig3==0   & is.na(plotd_post$alle)==1, 3, plotd_post$alle)
plotd_post$alle<-ifelse(plotd_post$new_elig4==0   & is.na(plotd_post$alle)==1, 4, plotd_post$alle)


plotd_post$alle<-as.factor(plotd_post$alle)




plotd_post$birthmo<-ifelse(is.na(plotd_post$birthmo)==1,6,plotd_post$birthmo)
plotd_post$bday<-paste(plotd_post$birthyr,plotd_post$birthmo,"01", sep="-")
plotd_post$bday<-as.Date(plotd_post$bday)
pol<-as.Date(c("2012-06-12"))

plotd_post<-subset(plotd_post, is.na(elig1)==0)
plotd_post$age_calc <- floor(age_calc(plotd_post$bday, enddate=pol, units = "years"))




plotd_post$label<-ifelse(plotd_post$alle==1,"Treated group (DACA-elig)",NA)
plotd_post$label<-ifelse(plotd_post$alle==2,"Met policy age but not immigration age",plotd_post$label)
plotd_post$label<-ifelse(plotd_post$alle==3,"Met imm age, but not policy age",plotd_post$label)
plotd_post$label<-ifelse(plotd_post$alle==4,"Met neither criteria",plotd_post$label)



ggplot(plotd_post, aes(x=age_calc, y=age_imm,  color=alle)) + 
  
  scale_color_manual(labels =  c("Treated group (DACA-elig)","Met policy age but not immigration age (b)","Met imm age, but not policy age (a)","Met neither criteria (c)"),
                     values=c("#F8766D","#7CAE00","#00BFC4","#C77CFF"))+
  geom_count(alpha = 0.75)  +xlab("Age at Policy")+ylab("Age at immigration") +
  ggtitle("Visual Depiction of Respondents -- Treated and Control Groups") +
  theme(
    legend.title = element_text( size = 20),
    legend.text = element_text(size = 16)
  )+ guides(colour = guide_legend(override.aes = list(size=7),title="Group"))


###################
#CODE FOR FIGURES 4 AND 5 -- SHOWING CRUDE RDD ESTIMATES
###################


sampledata2$k6_index<-as.numeric(as.character((sampledata2$k6_index)))

agg <- plotd_post %>% 
  group_by(alle, age_calc) %>% 
  summarise(mean = mean(k6_index, na.rm=TRUE), n = n(), 
            sd = sd(k6_index, na.rm=TRUE),  se = sd/sqrt(n), up=mean+1.96*se, low=mean-1.96*se )

agg<-subset(agg,(n>10 & alle==3)| (n>10 &  alle==1 & age_calc>=25))

k6post<-ggplot(subset(agg, agg$alle %in% c(1,3) ), aes(x=age_calc, y=mean,   color=alle)) +
  geom_line(alpha = 0.9) + geom_count(alpha = 0.9, size=1) +
  geom_smooth(method='lm', se=F) +xlim(25,36)+ ylim(0,4)+
  scale_color_manual(labels =  c("Treated group (DACA-elig)","Met immig age but not policy age"),
                     values=c("#F8766D","#00BFC4"))+
  xlab("Age at Policy")+ylab("Mean outcome")+guides(color=guide_legend(title="Group"))+theme(legend.position = c(0.2, 0.1))



agg <- plotd_post %>% 
  group_by(alle, age_calc) %>% 
  summarise(mean = mean(health, na.rm=TRUE), n = n(), 
            sd = sd(health, na.rm=TRUE),  se = sd/sqrt(n), up=mean+1.96*se, low=mean-1.96*se )

agg<-subset(agg,(n>10 & alle==3)| (n>10 &  alle==1 & age_calc>=25))

healthpost<-ggplot(subset(agg, agg$alle %in% c(1,3) ), aes(x=age_calc, y=mean,   color=alle)) +
  geom_line(alpha = 0.9) + geom_count(alpha = 0.9, size=1) +
  geom_smooth(method='lm', se=F) +xlim(25,36)+ ylim(3.5,4.2)+
  scale_color_manual(labels =  c("Treated group (DACA-elig)","Met immig age but not policy age"),
                     values=c("#F8766D","#00BFC4"))+
  xlab("Age at Policy")+ylab("Mean outcome")+guides(color=guide_legend(title="Group"))+theme(legend.position = c(0.2, 0.1))



plot_grid(healthpost, k6post, labels = c('Health scale (post-DACA)', 'K6 score (post-DACA)'))





plotd_pre<-subset(sampledata2, post==0 & is.na(elig1)==0)

plotd_pre$alle<-ifelse(plotd_pre$elig1==1, 1, NA)
plotd_pre$alle<-ifelse(plotd_pre$new_elig2==0 & is.na(plotd_pre$alle)==1, 2, plotd_pre$alle)
plotd_pre$alle<-ifelse(plotd_pre$new_elig3==0   & is.na(plotd_pre$alle)==1, 3, plotd_pre$alle)
plotd_pre$alle<-ifelse(plotd_pre$new_elig4==0   & is.na(plotd_pre$alle)==1, 4, plotd_pre$alle)


plotd_pre$alle<-as.factor(plotd_pre$alle)

plotd_pre$birthmo<-ifelse(is.na(plotd_pre$birthmo)==1,6,plotd_pre$birthmo)
plotd_pre$bday<-paste(plotd_pre$birthyr,plotd_pre$birthmo,"01", sep="-")
plotd_pre$bday<-as.Date(plotd_pre$bday)
pol<-as.Date(c("2012-06-12"))

plotd_pre<-subset(plotd_pre, is.na(elig1)==0)
plotd_pre$age_calc <- floor(age_calc(plotd_pre$bday, enddate=pol, units = "years"))



sampledata2$k6_index<-as.numeric(as.character((sampledata2$k6_index)))

agg <- plotd_pre %>% 
  group_by(alle, age_calc) %>% 
  summarise(mean = mean(k6_index, na.rm=TRUE), n = n(), 
            sd = sd(k6_index, na.rm=TRUE),  se = sd/sqrt(n), up=mean+1.96*se, low=mean-1.96*se )

agg<-subset(agg,(n>10 & alle==3 & age_calc <=36)| (n>10 &  alle==1 & age_calc>=25))

k6pre<-ggplot(subset(agg, agg$alle %in% c(1,3) ), aes(x=age_calc, y=mean,   color=alle)) +
  geom_line(alpha = 0.9) + geom_count(alpha = 0.9, size=1) +
  geom_smooth(method='lm', se=F)  +xlim(25,36)+ ylim(0,4) +
  scale_color_manual(labels =  c("Treated group (DACA-elig)","Met immig age but not policy age"),
                     values=c("#F8766D","#00BFC4"))+
  xlab("Age at Policy")+ylab("Mean outcome")+guides(color=guide_legend(title="Group"))+theme(legend.position = c(0.2, 0.1))


agg <- plotd_pre %>% 
  group_by(alle, age_calc) %>% 
  summarise(mean = mean(health, na.rm=TRUE), n = n(), 
            sd = sd(health, na.rm=TRUE),  se = sd/sqrt(n), up=mean+1.96*se, low=mean-1.96*se )

agg<-subset(agg,(n>10 & alle==3 & age_calc <=36)| (n>10 &  alle==1 & age_calc>=25))

healthpre<-ggplot(subset(agg, agg$alle %in% c(1,3) ), aes(x=age_calc, y=mean,   color=alle)) +
  geom_line(alpha = 0.9) + geom_count(alpha = 0.9, size=1) +
  geom_smooth(method='lm', se=F) +xlim(25,36)+ ylim(3.5,4.2)+
  scale_color_manual(labels =  c("Treated group (DACA-elig)","Met immig age but not policy age"),
                     values=c("#F8766D","#00BFC4"))+
  xlab("Age at Policy")+ylab("Mean outcome")+guides(color=guide_legend(title="Group"))+theme(legend.position = c(0.2, 0.1))




plot_grid(healthpre, k6pre, labels = c('Health scale (pre-DACA)','K6 score (pre-DACA)'))




###############################################################################

#### iv. Employment as Outcome Variable 

# Looking at employment as an intermediate outcome on the causal pathway

table(sampledata2$EMPSTATWKYR)

# Generate binary variable for whether had job in last 12 months
# This includes a) job in last 2 weeks and b) no job in last 2 weeks but had in
# last 12 months. 

sampledata2$emp_year<-ifelse(sampledata2$EMPSTATWKYR >= 1 & sampledata2$EMPSTATWKYR <= 2, 1, ifelse(sampledata2$EMPSTATWKYR >= 3 & sampledata2$EMPSTATWKYR <= 4,0,NA))
table(sampledata2$emp_year, sampledata2$EMPSTATWKYR)
# The employment variable is weighted using SAMPWEIGHT and so I will use 
# survDesign2 here, and subset into the different control groups as before


## ORIGINAL (ALL CONTROLS)

survDesign2 <- svydesign(id      = ~0,
                         strata  = NULL,
                         weights = ~SAMPWEIGHT,
                         nest    = FALSE,
                         data    = sampledata2)

survDesign3.1<-subset(survDesign2,!is.na(emp_year))

employedLOGIS <- svyglm(emp_year ~ AGE+temp+SEX+REGION+YEAR+INTERVWMO+elig1*post, design=survDesign3.1, family=binomial (link="logit"))
summary(employedLOGIS, df.resid = degf(survDesign3.1))
confint(employedLOGIS)
nobs(employedLOGIS)


# Repeat for control group a

survDesign3.3<-subset(survDesign2,!is.na(emp_year) & !is.na(new_elig3))

employedLOGIS_a <- svyglm(emp_year ~ AGE+temp+SEX+REGION+YEAR+INTERVWMO+new_elig3*post, design=survDesign3.3, family=binomial (link="logit"))
summary(employedLOGIS_a, df.resid = degf(survDesign3.3))
confint(employedLOGIS_a)
nobs(employedLOGIS_a)

# Repeat for control group b 

survDesign3.2<-subset(survDesign2,!is.na(emp_year) & !is.na(new_elig2))

employedLOGIS_b <- svyglm(emp_year ~ AGE+temp+SEX+REGION+YEAR+INTERVWMO+new_elig2*post, design=survDesign3.2, family=binomial (link="logit"))
summary(employedLOGIS_b, df.resid = degf(survDesign3.2))
confint(employedLOGIS_b)
nobs(employedLOGIS_b)

# Repeat for control group c

survDesign3.4<-subset(survDesign2,!is.na(emp_year) & !is.na(new_elig4))

employedLOGIS_c <- svyglm(emp_year ~ AGE+temp+SEX+REGION+YEAR+INTERVWMO+new_elig4*post, design=survDesign3.4, family=binomial (link="logit"))
summary(employedLOGIS_c, df.resid = degf(survDesign3.4))
confint(employedLOGIS_c)
nobs(employedLOGIS_c)
